{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Atlas\n",
    "\n",
    "\n",
    ">[Atlas](https://docs.nomic.ai/index.html) is a platform for interacting with both small and internet scale unstructured datasets by `Nomic`. \n",
    "\n",
    "This notebook shows you how to use functionality related to the `AtlasDB` vectorstore."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install spacy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "is_executing": true
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "!python3 -m spacy download en_core_web_sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install nomic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "pycharm": {
     "is_executing": true
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import time\n",
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.text_splitter import SpacyTextSplitter\n",
    "from langchain.vectorstores import AtlasDB\n",
    "from langchain.document_loaders import TextLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "ATLAS_TEST_API_KEY = '7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "loader = TextLoader('../../../state_of_the_union.txt')\n",
    "documents = loader.load()\n",
    "text_splitter = SpacyTextSplitter(separator='|')\n",
    "texts = []\n",
    "for doc in text_splitter.split_documents(documents):\n",
    "    texts.extend(doc.page_content.split('|'))\n",
    "                 \n",
    "texts = [e.strip() for e in texts]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "is_executing": true
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "db = AtlasDB.from_texts(texts=texts,\n",
    "                        name='test_index_'+str(time.time()), # unique name for your vector store\n",
    "                        description='test_index', #a description for your vector store\n",
    "                        api_key=ATLAS_TEST_API_KEY,\n",
    "                        index_kwargs={'build_topic_model': True})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "db.project.wait_for_project_lock()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "            <strong><a href=\"https://atlas.nomic.ai/dashboard/project/ee2354a3-7f9a-4c6b-af43-b0cda09d7198\">test_index_1677255228.136989</strong></a>\n",
       "            <br>\n",
       "            A description for your project 508 datums inserted.\n",
       "            <br>\n",
       "            1 index built.\n",
       "            <br><strong>Projections</strong>\n",
       "<ul>\n",
       "<li>test_index_1677255228.136989_index. Status Completed. <a target=\"_blank\" href=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\">view online</a></li></ul><hr><script>\n",
       "            destroy = function() {\n",
       "                document.getElementById(\"iframedb996d77-8981-48a0-897a-ff2c22bbf541\").remove()\n",
       "            }\n",
       "        </script>\n",
       "\n",
       "        <h4>Projection ID: db996d77-8981-48a0-897a-ff2c22bbf541</h4>\n",
       "        <div class=\"actions\">\n",
       "            <div id=\"hide\" class=\"action\" onclick=\"destroy()\">Hide embedded project</div>\n",
       "            <div class=\"action\" id=\"out\">\n",
       "                <a href=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\" target=\"_blank\">Explore on atlas.nomic.ai</a>\n",
       "            </div>\n",
       "        </div>\n",
       "        \n",
       "        <iframe class=\"iframe\" id=\"iframedb996d77-8981-48a0-897a-ff2c22bbf541\" allow=\"clipboard-read; clipboard-write\" src=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\">\n",
       "        </iframe>\n",
       "\n",
       "        <style>\n",
       "            .iframe {\n",
       "                /* vh can be **very** large in vscode ipynb. */\n",
       "                height: min(75vh, 66vw);\n",
       "                width: 100%;\n",
       "            }\n",
       "        </style>\n",
       "        \n",
       "        <style>\n",
       "            .actions {\n",
       "              display: block;\n",
       "            }\n",
       "            .action {\n",
       "              min-height: 18px;\n",
       "              margin: 5px;\n",
       "              transition: all 500ms ease-in-out;\n",
       "            }\n",
       "            .action:hover {\n",
       "              cursor: pointer;\n",
       "            }\n",
       "            #hide:hover::after {\n",
       "                content: \" X\";\n",
       "            }\n",
       "            #out:hover::after {\n",
       "                content: \"\";\n",
       "            }\n",
       "        </style>\n",
       "        "
      ],
      "text/plain": [
       "AtlasProject: <{'id': 'ee2354a3-7f9a-4c6b-af43-b0cda09d7198', 'owner': '9c29afbb-a002-4d49-958e-ecf5ae1351ac', 'project_name': 'test_index_1677255228.136989', 'creator': 'auth0|63efc4b5462246f4d9a6ecf2', 'description': 'A description for your project', 'opensearch_index_id': 'f61fb8dd-0abf-4f31-9130-41870e443902', 'is_public': True, 'project_fields': ['atlas_id', 'text'], 'unique_id_field': 'atlas_id', 'modality': 'text', 'total_datums_in_project': 508, 'created_timestamp': '2023-02-24T16:13:50.313363+00:00', 'atlas_indices': [{'id': 'b1b01833-0964-4597-a4bc-a2d60700949d', 'project_id': 'ee2354a3-7f9a-4c6b-af43-b0cda09d7198', 'index_name': 'test_index_1677255228.136989_index', 'indexed_field': 'text', 'created_timestamp': '2023-02-24T16:13:52.957101+00:00', 'updated_timestamp': '2023-02-24T16:14:03.469621+00:00', 'atoms': ['charchunk', 'document'], 'colorable_fields': [], 'embedders': [{'id': '7ec0868a-4eed-4414-a482-25cce9803e1b', 'atlas_index_id': 'b1b01833-0964-4597-a4bc-a2d60700949d', 'ready': True, 'model_name': 'NomicEmbed', 'hyperparameters': {'norm': 'both', 'batch_size': 20, 'polymerize_by': 'charchunk', 'dataset_buffer_size': 1000}}], 'nearest_neighbor_indices': [{'id': '86f8e3ff-e07c-4678-a4d7-144db4b0301d', 'index_name': 'NomicOrganize', 'ready': True, 'hyperparameters': {'dim': 384, 'space': 'l2'}, 'atom_strategies': ['document']}], 'projections': [{'id': 'db996d77-8981-48a0-897a-ff2c22bbf541', 'projection_name': 'NomicProject', 'ready': True, 'hyperparameters': {'spread': 1.0, 'n_epochs': 50, 'n_neighbors': 15}, 'atom_strategies': ['document'], 'created_timestamp': '2023-02-24T16:13:52.979561+00:00', 'updated_timestamp': '2023-02-24T16:14:03.466309+00:00'}]}], 'insert_update_delete_lock': False}>"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db.project"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
